from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import re
import random
from lxml import etree
from selenium.webdriver.support import expected_conditions as EC
import xlwt
import requests
import pandas as pd
import openpyxl
class FacebookSpider(object):

    def __init__(self):
        # 选择浏览器
        self.browser = webdriver.Chrome()
        # 访问facebook网页
        self.browser.get('https://www.facebook.com/login.php?login_attempt=1&lwv=110/')
        # 输入账户密码
        self.browser.find_element_by_id('email').clear()
        self.browser.find_element_by_id('email').send_keys('jim_clear@163.com')
        self.browser.find_element_by_id('pass').clear()
        self.browser.find_element_by_id('pass').send_keys('jimclear')
        # 模拟点击登录按钮，两种不同的点击方法。。。
        try:
            self.browser.find_element_by_xpath('//button[@id="loginbutton"]').send_keys(Keys.ENTER)
        except:
            # self.browser.find_element_by_xpath('//input[@tabindex="4"]').send_keys(Keys.ENTER)
            # self.browser.find_element_by_xpath('//a[@href="https://www.facebook.com/?ref=logo"]').send_keys(Keys.ENTER)
            time.sleep(6)
        result = EC.alert_is_present()(self.browser)  # 检测是否有弹窗
        if result:  # 当有弹窗时
            al = self.browser.switch_to_alert()  # 获取弹窗的位置
            time.sleep(1)
            al.accept()

    def send_request(self):
        time.sleep(6)
        self.browser.find_element_by_xpath(
            "//ul[@class='thodolrn ojvp67qx taijpn5t buofh1pr j83agx80 aovydwv3 bqdfd6uv']/li[@class='buofh1pr to382e16 o5zgeu5y jrc8bbd0 dawyy4b1 h676nmdw hw7htvoc'][2]/span[@class='tojvnm2t a6sixzi8 abs2jz4q a8s20v7p t1p8iaqh k5wvi7nf q3lfd5jv pk4s997a bipmatt0 cebpdrjk qowsmv63 owwhemhu dp1hu0rb dhp61c6y iyyx5f41']/div[@class='bp9cbjyn j83agx80 byvelhso l9j0dhe7']/a[@class='oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 j83agx80 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl l9j0dhe7 abiwlrkh p8dawk7l bp9cbjyn cbu4d94t datstx6m taijpn5t k4urcfbm']").click()
        time.sleep(random.random() * 10 + 3)
        self.browser.find_element_by_xpath(
            "//div[6]/a[@class='oajrlxb2 gs1a9yip g5ia77u1 mtkw9kbi tlpljxtp qensuy8j ppp5ayq2 goun2846 ccm00jje s44p3ltw mk2mc5f4 rt8b4zig n8ej3o3l agehan2d sk4xxmp2 rq0escxv nhd2j8a9 a8c37x1j mg4g778l btwxx1t3 pfnyh3mw p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x tgvbjcpo hpfvmrgz jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso l9j0dhe7 i1ao9s8h esuyzwwr f1sip0of du4w35lb lzcic4wl abiwlrkh p8dawk7l ue3kfks5 pw54ja7n uo3d90p7 l82x9zwi']/div[@class='ow4ym5g4 auili1gw rq0escxv j83agx80 buofh1pr g5gj957u i1fnvgqd oygrvhab cxmmr5t8 hcukyx3x kvgmc6g5 nnctdnn4 hpfvmrgz qt6c0cv9 jb3vyjys l9j0dhe7 du4w35lb bp9cbjyn btwxx1t3 dflh9lhu scb9dxdr']/div[@class='ow4ym5g4 auili1gw rq0escxv j83agx80 buofh1pr g5gj957u i1fnvgqd oygrvhab cxmmr5t8 hcukyx3x kvgmc6g5 tgvbjcpo hpfvmrgz qt6c0cv9 rz4wbd8a a8nywdso jb3vyjys du4w35lb bp9cbjyn btwxx1t3 l9j0dhe7']/div[@class='gs1a9yip ow4ym5g4 auili1gw rq0escxv j83agx80 cbu4d94t buofh1pr g5gj957u i1fnvgqd oygrvhab cxmmr5t8 hcukyx3x kvgmc6g5 tgvbjcpo hpfvmrgz rz4wbd8a a8nywdso l9j0dhe7 du4w35lb rj1gh0hx pybr56ya f10w8fjw']/div/div[@class='j83agx80 cbu4d94t ew0dbk1b irj2b8pg']/div[@class='qzhwtbm6 knvmm38d'][1]/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db a5q79mjw g1cxx5fr ekzkrbhg oo9gr5id hzawbc8m']/span[@class='a8c37x1j ni8dbmo4 stjgntxs l9j0dhe7 ojkyduve']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh jq4qci2q a3bd9o3v lrazzd5p oo9gr5id']").click()
        time.sleep(random.random() * 10 + 3)


        self.browser.get("https://www.facebook.com/groups/565495247379394/members")
        n = 1000
        m = 0
        while m < 2500:
            self.browser.execute_script(f"window.scrollTo(0,{n})")
            m += 10
            print(m, '*************************************', n)
            n += 3000
            time.sleep(5)
        import datetime
        # 当前时间获取
        today = datetime.date.today()
        oneday = datetime.timedelta(days=1)
        yesterday = today - oneday
        time.sleep(5)
        link_list = self.browser.find_elements_by_xpath("//div[@class='ue3kfks5 pw54ja7n uo3d90p7 l82x9zwi a8c37x1j']")
        link_list=link_list[5:]
        k=1
        wb= openpyxl.Workbook()
        ws= wb.create_sheet('sheet1')
        for i in link_list:
            k+=1
        # 本人名字
            time.sleep(3)
            try:
                nname=i.find_element_by_xpath(".//div[@class='qzhwtbm6 knvmm38d'][1]/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v ekzkrbhg oo9gr5id hzawbc8m']/span/div[@class='nc684nl6']/a[@class='oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl oo9gr5id gpro0wi8 lrazzd5p']").text
            except Exception as jg:
                nname=i.find_element_by_xpath(".//div[@class='qzhwtbm6 knvmm38d'][1]/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v ekzkrbhg oo9gr5id hzawbc8m']/span/div[@class='nc684nl6']/div[@class='oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl oo9gr5id gpro0wi8 lrazzd5p']").text

        # # 本人名字链接
            try:
                lianjie = i.find_element_by_xpath(".//div[@class='qzhwtbm6 knvmm38d']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d3f4x2em fe6kdd0r mau55g9w c8b282yb iv3no6db jq4qci2q a3bd9o3v ekzkrbhg oo9gr5id hzawbc8m']/span/div[@class='nc684nl6']/a").get_attribute('href')
            except Exception as jg:
                lianjie='/'
        # 介绍人名字
            try:
                jieshaorenname=i.find_element_by_xpath(".//div[@class='qzhwtbm6 knvmm38d'][2]/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d9wwppkn fe6kdd0r mau55g9w c8b282yb mdeji52x e9vueds3 j5wam9gi knj5qynh m9osqain hzawbc8m']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh e9vueds3 j5wam9gi lrazzd5p m9osqain']/a[@class='oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl gmql0nx0 gpro0wi8 lrazzd5p']").text
            except Exception as j:
                jieshaorenname = '/'
        # # 介绍人链接
            try:
                jieshaorenlianjie=i.find_element_by_xpath(".//div[@class='qzhwtbm6 knvmm38d']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d9wwppkn fe6kdd0r mau55g9w c8b282yb mdeji52x e9vueds3 j5wam9gi knj5qynh m9osqain hzawbc8m']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh e9vueds3 j5wam9gi lrazzd5p m9osqain']/a").get_attribute('href')
            except Exception as f:
                jieshaorenlianjie='////'
        # # 时间
            try:
                shijian1=i.find_element_by_xpath(".//div[@class='qzhwtbm6 knvmm38d']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 d9wwppkn fe6kdd0r mau55g9w c8b282yb mdeji52x e9vueds3 j5wam9gi knj5qynh m9osqain hzawbc8m']/span[@class='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh e9vueds3 j5wam9gi lrazzd5p m9osqain']").text
            except Exception as jg:
                shijian1='/'
            if shijian1[:8]=='Added by':
                shijian1 = shijian1 + 'aaa'
                shijian=re.findall(f'Added by {jieshaorenname} (.*?)aaa',shijian1)[0]
                if shijian == 'Today':
                    shijian = today
                elif shijian == 'Yesterday':
                    shijian = yesterday
            elif shijian1[:10]=='Invited by':
                shijian1 = shijian1 + 'aaa'
                shijian = re.findall(f'Invited by {jieshaorenname} (.*?)aaa', shijian1)[0]
                if shijian == 'Today':
                    shijian = today
                elif shijian == 'Yesterday':
                    shijian = yesterday
            else:
                shijian=shijian1
            jim = f':本人名字{nname},本人名字链接{lianjie},介绍人名字{jieshaorenname},介绍人链接{jieshaorenlianjie},时间{shijian}'
            ws.cell(row=k,column=1).value=f'{nname}'
            ws.cell(row=k, column=2).value = f'{lianjie}'
            ws.cell(row=k, column=3).value = f'{jieshaorenname}'
            ws.cell(row=k, column=4).value = f'{jieshaorenlianjie}'
            ws.cell(row=k, column=5).value = f'{shijian}'
            print(jim)
        wb.save('FaceBook2.xlsx')
        print('保存成功')
if __name__ == '__main__':
    spider = FacebookSpider()

    spider.send_request()



